---
title: "APSR Replication"
author: "Fraga, Velez, West"
date: "2024-02-14"
output:
  html_document: default
  pdf_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(fig.width=16, fig.height=10, warning = FALSE, echo = FALSE, message = FALSE)
```

## Figure 1: Net Vote Shifts across Latino Subgroups

```{r fig1}
library(tidyverse); library(boot); library(survey); library(mirt);  library(modelsummary); library(tinytable); library(scales); library(data.table); library(grid); library(gridExtra); library(magrittr); library(hrbrthemes); library(fixest); library(knitr); library(kableExtra)

# load CES surveys
CES_2016 <- rio::import("2016_CES/CCES16_Common_OUTPUT_Feb2018_VV.dta")
CES_2020 <- rio::import("2020_CES/CES20_Common_OUTPUT_vv.dta")

# load weights
weights_2016 <- rio::import("weights/CES_2016_LatWeights.csv")
weights_2020 <- rio::import("weights/CES_2020_LatWeights.csv")

# subset on Latino and clean 2016 variables
CES_2016_sub <- CES_2016 %>%
  mutate(fips = as.numeric(countyfips)) %>%
  filter(race == 3 | hispanic == 1) %>%
  transmute(V101,
            state = inputstate,
            gender = as.numeric(gender == 2),
    voted = as.numeric(CL_E2016GVM != ""),
            vote.choice = ifelse(CC16_410a > 4, NA, 
                                 CC16_410a), 
            ideo5 = ifelse(ideo5 > 5, NA, ideo5),
            pid3 = ifelse(pid3 > 3, NA, pid3),
    religion = plyr::mapvalues(religpew, c(3:8, 9:11, 12, 98, 99), c(rep(3, 6), rep(4, 3), 3, NA, NA)),
           age_group = plyr::mapvalues(cut(2016 - birthyr,
                                    c(18, 35, 55, 100),
                                    include.lowest = F,
                                    right = F), c("[18,35)", "[35,55)", "[55,100)"),
                                c("age1834", "age3554", "age55plus")),
            imm.stat = ifelse(immstat > 5, NA, immstat),
            crime1 = as.numeric(CC16_334a == 2),
            crime2 = as.numeric(CC16_334b == 2),
            crime3 = as.numeric(CC16_334c == 1),
            crime4 = as.numeric(CC16_334d == 2),
            smedia1 = as.numeric(CC16_300d_1 == 1),
            smedia2 = as.numeric(CC16_300d_2 == 1),
            smedia3 = as.numeric(CC16_300d_3 == 1),
            smedia4 = as.numeric(CC16_300d_4 == 1),
            smedia5 = as.numeric(CC16_300d_5 == 1),
            tv = as.numeric(CC16_300_2 == 1),
            mig1 = as.numeric(CC16_331_1 == 2),
            mig2 = as.numeric(CC16_331_2 == 1),
            mig3 = as.numeric(CC16_331_3 == 2),
            mig4 = as.numeric(CC16_331_7 == 1),
            mig8 = as.numeric(CC16_331_5 == 1),
            mig9 = as.numeric(CC16_331_6 == 2),
            mig10 = as.numeric(CC16_331_8 == 1),
            mig11 = as.numeric(CC16_331_4 == 1),
            educ = ifelse(educ > 6, NA, educ),
            faminc = ifelse(faminc > 16, NA, faminc), 
            mexican = as.numeric(Hispanic_origin_3 == 1),
            pr = as.numeric(Hispanic_origin_4 == 1),
            cuban = as.numeric(Hispanic_origin_5 == 1),
            weight = commonweight,
            vvweight = commonweight_vv) %>%
  left_join(weights_2016)

# subset on Latino and clean 2020 variables
CES_2020_sub <- CES_2020 %>%
  filter(race == 3 | hispanic == 1) %>%
  mutate(fips = as.numeric(countyfips)) %>%
  transmute(caseid,
            state = inputstate,
            gender = as.numeric(gender == 2),
    voted = as.numeric(!is.na(CL_2020gvm)),
         vote.choice = ifelse(CC20_410 > 4, NA, 
                              CC20_410), #1
         vote.choice = plyr::mapvalues(vote.choice,
                                       c(1,2),
                                       c(2,1)),
         retro.econ = ifelse(CC20_302 > 5, NA, CC20_302),
         ideo5 = ifelse(ideo5 > 5, NA, ideo5),
    religion = plyr::mapvalues(religpew, c(3:8, 9:11, 12, 98, 99), c(rep(3, 6), rep(4, 3), 3, NA, NA)),
    age_group = plyr::mapvalues(cut(2020 - birthyr,
                                    c(18, 35, 55, 100),
                                    include.lowest = F,
                                    right = F), c("[18,35)", "[35,55)", "[55,100)"),
                                c("age1834", "age3554", "age55plus")),
         pid3 = ifelse(pid3 > 3, NA, pid3),
         imm.stat = ifelse(immstat > 5, NA, immstat),
         crime1 = as.numeric(CC20_334a == 2),
         crime2 = as.numeric(CC20_334b == 2),
         crime3 = as.numeric(CC20_334c == 1),
         crime5 = as.numeric(CC20_334d == 2),
         crime6 = as.numeric(CC20_334e == 2),
         crime7 = as.numeric(CC20_334f == 2),
         crime8 = as.numeric(CC20_334g == 2),
         crime9 = as.numeric(CC20_334h == 2),
         smedia1 = as.numeric(CC20_300d_1 == 1),
         smedia2 = as.numeric(CC20_300d_2 == 1),
         smedia3 = as.numeric(CC20_300d_3 == 1),
         smedia4 = as.numeric(CC20_300d_4 == 1),
         smedia5 = as.numeric(CC20_300d_5 == 1),
         tv = as.numeric(CC20_300_2 == 1),
         mig1 = as.numeric(CC20_331a == 2),
         mig2 = as.numeric(CC20_331b == 1),
         mig5 = as.numeric(CC20_331c == 1),
         mig6 = as.numeric(CC20_331d == 1),
         mig7 = as.numeric(CC20_331e == 1),
         educ = ifelse(educ > 6, NA, educ), 
         faminc = ifelse(faminc_new > 16, NA, faminc_new), 
         mexican = as.numeric(CC20_hisp_3 == 1),
         pr = as.numeric(CC20_hisp_4 == 1),
         cuban = as.numeric(CC20_hisp_5 == 1),
         weight = commonweight,
         vvweight = vvweight) %>%
  left_join(weights_2020)

CES_2016_sub[which(is.na(CES_2016_sub$eb_weights)), 
             "eb_weights"] <- mean(CES_2016_sub$eb_weights, na.rm = T)

CES_2020_sub[which(is.na(CES_2020_sub$eb_weights)), 
             "eb_weights"] <- mean(CES_2020_sub$eb_weights, na.rm = T)

# stack CES surveys
CES_sub <- bind_rows(CES_2016_sub %>% mutate(year = 2016), 
                     CES_2020_sub %>% mutate(year = 2020))

CES_sub <- CES_sub %>% mutate(State = plyr::mapvalues(state, 
                        sjlabelled::get_values(CES_2016$inputstate), 
                        sjlabelled::get_labels(CES_2016$inputstate))) %>%
  mutate(region = state.region[match(State, state.name)])

# additional cleaning
CES_sub <- CES_sub %>% 
  mutate(fam5 = ntile(faminc, 5)) %>%
  bind_cols(police = fscores(mirt(CES_sub %>% 
            dplyr::select(starts_with("crime")),
            model = 1, verbose = FALSE, itemtype = "2PL"))[,1],
            media = fscores(mirt(CES_sub %>% 
            dplyr::select(starts_with("smedia")),
            model = 1, verbose = FALSE, itemtype = "2PL"))[,1],
            imm = fscores(mirt(CES_sub %>% 
            dplyr::select(starts_with("mig")),
            model = 1, verbose = FALSE, itemtype = "2PL"))[,1]) %>%
  mutate(educ = plyr::mapvalues(educ, 1:6, c(1,1,2,2,3,3))) %>%
  mutate(region = plyr::mapvalues(region, c("Northeast", "South",
                                            "North Central", "West"),
                                  c(1, 2, 3, 4))) %>%
  mutate(age_group = as.numeric(age_group))

# Estimate IRT models
CES_sub$police <- ntile(CES_sub$police, 4)
CES_sub$imm <- ntile(CES_sub$imm, 4)
CES_sub$media <- ntile(CES_sub$media, 4)
CES_sub$gender <- CES_sub$gender+1

# implement grimmer marble tanigawa-lau method
GMTL <- function(data, indices, variable, value, ...) {

  # for bootstrapping
  data <- data[indices,]
  
  # Set up weights for 2016
  wgs.2016 <- svydesign(ids = ~1, weights = ~ eb_weights, 
                        data = data %>% filter(year == 2016) %>% 
                          mutate(vote.choice.1 = as.numeric(vote.choice == 1),
              vote.choice.2 = as.numeric(vote.choice == 2),
              vote.subgroup = as.numeric(voted == 1 & get(variable) == value),
              subgroup = as.numeric(get(variable) == value)))
  
  # Set up weights for 2020
  wgs.2020 <- svydesign(ids = ~1, weights = ~ eb_weights, 
                        data = data %>% filter(year == 2020) %>%
                          mutate(vote.choice.1 = as.numeric(vote.choice == 1),
                                 vote.choice.2 = as.numeric(vote.choice == 2),
                  vote.subgroup = as.numeric(voted == 1 & get(variable) == value),
                      subgroup = as.numeric(get(variable) == value)))
  
  # calculate weighted estimate of Pr(Vote Choice | Subgroup, 2016)
  vote1.2016 <- sum(coef(svyglm(vote.choice.1 ~ subgroup, wgs.2016,
                                subset = voted == 1)))
  vote2.2016 <- sum(coef(svyglm(vote.choice.2 ~ subgroup, wgs.2016,
                                subset = voted == 1)))
  
  # calculate weighted estimate of Pr(Vote Choice | Subgroup, 2020)
  vote1.2020 <- sum(coef(svyglm(vote.choice.1 ~ subgroup, wgs.2020,
                                subset = voted == 1)))
  vote2.2020 <- sum(coef(svyglm(vote.choice.2 ~ subgroup, wgs.2020,
                                subset = voted == 1)))
  
  # calculate weighted estimate of Pr(Turnout | Subgroup, 2016)
  turnout.2016 <- sum(coef(svyglm(voted ~ subgroup, wgs.2016)))
  # calculate weighted estimate of Pr(Turnout | Subgroup, 2020)  
  turnout.2020 <- sum(coef(svyglm(voted ~ subgroup, wgs.2020)))
  
  # calculate weighted estimate of Pr(Subgroup | 2016)
  px.2016 <- svymean(~subgroup, wgs.2016, na.rm = T)
  # calculate weighted estimate of Pr(Subgroup | 2020)
  px.2020 <- svymean(~subgroup, wgs.2020, na.rm = T)
  
  # Return net vote variable and individual variables used to construct it
  return(c((((vote1.2020 - vote2.2020) * turnout.2020 * px.2020) - 
              ((vote1.2016 - vote2.2016) * turnout.2016 * px.2016)),
           vote1.2016, vote1.2020, vote2.2016, vote2.2020,
           turnout.2016, turnout.2020, px.2016, px.2020))
  
}

set.seed(33014)

# begin bootstrapping routine
t1_1620 <- CES_sub %>%
  summarise_each(~n_distinct(.x, na.rm = T)) %>% 
  # select subgroup vars
  dplyr::select(imm.stat, pid3, ideo5, age_group, gender,
                educ, fam5, police, imm, 
                mexican, cuban, pr,
                region, media, religion) %>%
  gather(x, y) %>% 
  dplyr::group_by(x) %>%
  dplyr::summarize(value = 1:y) %>%
  dplyr::rename(variable = x) %>%
  ungroup() %>%
  as.list() %>%
  pmap(\(variable, value) broom::tidy(boot(statistic = GMTL, 
                                           variable = variable, 
                                           value = value, 
                               R = 1000, data = CES_sub,
                               ncpus = 10,
                               parallel = "multicore")) %>% 
         mutate(term = c("shift", 
                         "vote1.2016", "vote1.2020", 
                         "vote2.2016", "vote2.2020", 
                         "turnout.2016", "turnout.2020", 
                         "px.2016", "px.2020")) %>% 
         dplyr::select(term, statistic, std.error) %>%
         bind_cols(variable = variable, value = value)) %>% 
  bind_rows() 

# set up data for plots
t1_1620 %>%
 filter(variable == "mexican" & value == 1 |
          variable == "cuban" & value == 1 |
          variable == "pr" & value == 1 |
          variable != "pr" & variable != "mexican" & variable != "cuban") -> t1_1620.r

t1_1620.r[which(t1_1620.r$variable == "cuban"),"value"] <- 2
t1_1620.r[which(t1_1620.r$variable == "pr"),"value"] <- 3

t1_1620.r %>%
  filter(variable != "imm.stat" | 
         variable == "imm.stat" & value != 2) %>%
  mutate(variable = plyr::mapvalues(variable, c("mexican", "cuban", "pr"),
                                    c("ancestry", "ancestry", "ancestry"))) %>%
  nest(statistic, std.error, .key = 'value_col') %>%
  spread(key = term, value = value_col) %>%
  mutate(value = ifelse(variable == "educ", plyr::mapvalues(value, 
                                                            1:3,
                                                            c("  No college", 
                                                              " Some College", 
                                                              "College+")),
                        value)) %>%
  mutate(value = ifelse(variable == "age_group", plyr::mapvalues(value, 
                                                            1:3,
                                                            c("18-34", 
                                                              "34-55", 
                                                              "55+")),
                        value)) %>%
  mutate(value = ifelse(variable == "fam5", plyr::mapvalues(value, 1:5,
                                                            c("Quantile 1", 
                                                              "Quantile 2", 
                                                              "Quantile 3", 
                                                              "Quantile 4", 
                                                              "Quantile 5")),
                        value)) %>%
  mutate(value = ifelse(variable == "ideo5", plyr::mapvalues(value, 1:5,
                                                            c("   Very\nLiberal", 
                                                              " Liberal", 
                                                              " Mod.", 
                                                              "Conservative", 
                                                              "Very\nConservative")),
                        value)) %>%
  mutate(value = ifelse(variable == "imm.stat", plyr::mapvalues(value, 1:5,
                                                             c(" Immigrant\nCitizen", 
                                                               "", 
                                                               "1st\nGeneration", 
                                                               "2nd\nGeneration", 
                                                               "3rd\nGeneration")),
                        value)) %>% 
  mutate(value = ifelse(variable == "pid3", plyr::mapvalues(value, 1:3,
                                                                c("Democrat",
                                                                  "Republican",
                                                                  "Independent")),
                        value)) %>%
  mutate(value = ifelse(variable == "police", plyr::mapvalues(value, 1:5,
                                                              c("Quantile 1", 
                                                                "Quantile 2", 
                                                                "Quantile 3", 
                                                                "Quantile 4", 
                                                                "Quantile 5")),
                        value)) %>%
  mutate(value = ifelse(variable == "region", plyr::mapvalues(value, 1:4,
                                                              c("Northeast", "South",
                                                                "North Central", "West")),
                        value)) %>%
  mutate(value = ifelse(variable == "imm", plyr::mapvalues(value, 1:5,
                                                              c("Quantile 1", 
                                                                "Quantile 2", 
                                                                "Quantile 3", 
                                                                "Quantile 4", 
                                                                "Quantile 5")),
                        value)) %>%
 mutate(value = ifelse(variable == "media", plyr::mapvalues(value, 1:5,
                                                          c("Quantile 1", 
                                                            "Quantile 2", 
                                                            "Quantile 3", 
                                                            "Quantile 4", 
                                                            "Quantile 5")),
                       value)) %>%
  mutate(value = ifelse(variable == "ancestry", plyr::mapvalues(value, 1:3,
                                                           c("Mexican", 
                                                             "Cuban", 
                                                             "Puerto\nRican")),
                        value)) %>%
 mutate(value = ifelse(variable == "gender", plyr::mapvalues(value, 1:2,
                                                               c("M", 
                                                                 "F")),
                       value)) %>%
 mutate(value = ifelse(variable == "religion", plyr::mapvalues(value, 1:4,
                                                             c("Protestant", 
                                                               "Catholic",
                                                               "Other",
                                                               "Atheist/Agnostic")),
                       value)) %>%
  mutate(variable = plyr::mapvalues(variable, c("imm", "police",
                                               "ideo5", "fam5",
                                               "educ", "pid3", "imm.stat",
                                               "media", "ancestry", "age_group",
                                               "region", "gender",  "religion"), c("Immigration Restrictionism",
                                                         "Conservative Crime Policy Support", "Ideology",
                                                         "Income", "Education", "Party",
                                                         "Generational Status", "Social Media Usage",
                                                         "Ancestry", "Age Group",
                                                         "Region", "Sex", "Religion"))) %>%
  unnest(px.2016, px.2020, 
         shift, turnout.2016, turnout.2020,
         vote1.2016, vote2.2016,
         vote1.2020, vote2.2020, .sep = '_') %>%
  transmute(variable, value, `Net R vote increase among Latinos` =
              shift_statistic,
            `Difference in subgroup R support` = 
              vote1.2020_statistic - vote1.2016_statistic,
            `Difference in subgroup turnout` = 
              turnout.2020_statistic - turnout.2016_statistic,
            `Difference in subgroup composition` = 
              px.2020_statistic - px.2016_statistic,
            `Difference in subgroup R-D margin` =
              (vote1.2020_statistic - vote2.2020_statistic) -
              (vote1.2016_statistic - vote2.2016_statistic),
            `Proportionate change in subgroup R support` = 
              (vote1.2020_statistic - vote1.2016_statistic)/vote1.2016_statistic,
            `Proportionate change in R-D margin` = 
              ((vote1.2020_statistic - vote2.2020_statistic) -
              (vote1.2016_statistic - vote2.2016_statistic))/(vote1.2016_statistic - vote2.2016_statistic),
            SE = shift_std.error) %>%
  gather(term, estimate, 
  `Net R vote increase among Latinos`:
    `Proportionate change in R-D margin`) -> t2_1620

kable(t2_1620, "html") %>%
  kable_styling("striped", full_width = F) %>%
  scroll_box(width = "750px", height = "500px")

# net votes plot (2016-2020)
p1 <- t2_1620 %>%
 filter(value != "") %>%
 filter(term == "Net R vote increase among Latinos") %>%
 mutate(variable = fct_reorder(variable, -abs(estimate))) %>%
 mutate(` ` = term) %>%
 ggplot(aes(x = value, y = estimate, group = ` `)) +
 geom_pointrange(aes(ymin = estimate - 1.96*SE,
                 ymax = estimate + 1.96*SE)) +
 geom_line() +
 facet_wrap(. ~ variable, scales = "free") +
 # Note: theme_ipsum_rc from the hrbrthemes package is used in the manuscript and appendix
 # However, it requires the Roboto font to be installed. theme_minimal is used a placeholder theme
 # that maximizes compatibility across different machines.
 theme_minimal() +
 geom_hline(yintercept = 0, linetype = "dashed") +
 guides(shape=guide_legend(nrow=2,byrow=TRUE)) +
 scale_alpha(guide = 'none') +
 scale_shape_discrete(c(1,2,3,4)) +
 labs(x = "", y = "Increase in Net Republican Votes") +
 theme(panel.grid.major = element_blank(),
       panel.grid.minor = element_blank(),
       axis.text.x = element_text(size = 8),
       legend.position = "bottom")

p1

```

## Figure 2: Decomposing Net Votes

```{r fig2}

# constituent estimates of net vote increase (2016-2020)
p2 <- t2_1620 %>%
 filter(value != "") %>%
 mutate(t_stat = as.numeric(abs(estimate/SE) >= 1.5)) %>%
 filter(term == "Net R vote increase among Latinos") %>%
 group_by(variable) %>%
 dplyr::summarize(t_max = sum(t_stat)) %>%
 filter(t_max > 0) %>%
 select(variable) %>%
 left_join(t2_1620) %>%
 filter(term == "Difference in subgroup composition" |
         term == "Difference in subgroup R support" |
         term == "Difference in subgroup turnout") %>%
 mutate(estimate_r = round(estimate, 2)) %>%
 mutate(` ` = term) %>%
 ggplot(aes(x = value, y = estimate, group = ` `, color = ` `)) +
 geom_point(size = 4) +
 geom_line() +
 facet_wrap(. ~ variable, scales = "free") +
 theme_minimal() +
 geom_hline(yintercept = 0, linetype = "dashed") +
 guides(shape=guide_legend(nrow=2,byrow=TRUE)) +
 scale_alpha(guide = 'none') +
 scale_shape_discrete(c(1,2,3,4)) +
 scale_color_grey() +
 labs(x = "", y = "Percentage Point Change") +
 theme(panel.grid.major = element_blank(),
       panel.grid.minor = element_blank(),
       axis.text.x = element_text(size = 14),
       axis.text.y = element_text(size = 14),
       legend.text = element_text(size = 14),
       axis.title.y = element_text(size = 14),
       axis.title.x = element_text(size = 14),
       legend.position = "bottom")

p2
```

## Figure 3: Tract-level Trump Voteshare Change

```{r fig3}


# Load merged Voterfile and VEST data
data <- read.csv("vf_vest_census/VoterFile_VEST_Merged.csv")

# Create variables 
data$Hispanic20_Pct <- data$Hispanic20/data$Total20
data$TotalVotes16 <- data$G16PRERTRU + data$G16PREDCLI
data$TotalVotes20 <- data$G20PRERTRU + data$G20PREDBID
data$Trump20_Pct <- data$G20PRERTRU/data$TotalVotes20
data$Trump16_Pct <- data$G16PRERTRU/data$TotalVotes16
data$Nsize <- data$TotalVotes16
data$Nsize[data$Nsize > 10000] <- 10000 # topcode for point sizing ONLY

displayprefs <-  theme_bw() + theme(panel.grid.minor=element_blank(), 
                                    panel.grid.major.y=element_blank(), 
                                    panel.grid.major.x=element_blank(),
                                    panel.border=element_blank(),
                                    axis.text.x=element_text(family="Helvetica", size=12, color="black"),
                                    axis.title.x=element_text(family="Helvetica", size=14, color="black", face="bold"),
                                    axis.title.y=element_text(family="Helvetica", size=12, color="black", vjust=1.1),
                                    axis.title.y.right=element_text(family="Helvetica", size=12, color="blue", vjust=1.1),
                                    axis.text.y=element_text(family="Helvetica", size=12, color="black"),
                                    axis.text.y.right=element_text(family="Helvetica", size=10, color="blue"),
                                    axis.line=element_line(color="black", size=0.5),
                                    axis.ticks.x=element_line(color="black", size=1),
                                    axis.ticks.y.right=element_line(color="blue", size=0.5),
                                    plot.caption=element_text(color="gray40"),
                                    plot.subtitle=element_text(family="Helvetica", size=8, vjust=0),
                                    plot.title=element_text(family="Helvetica", size=14, vjust=0, face="bold"),
                                    legend.text=element_text(family="Helvetica", size=12, color="black"),
                                    legend.position="none",
                                    legend.title=element_blank())

fig3 <- ggplot(subset(data, TotalVotes16 > 100), aes(x = Hispanic20_Pct, y = Trump20_Pct-Trump16_Pct, size=Nsize/100))
fig3 <- fig3 + geom_point(alpha=0.1)
fig3 <- fig3 + geom_segment(y=0, yend=0, x=-Inf, xend=Inf, size=0.25, color="red", alpha=0.25, linetype = 2, inherit.aes=FALSE)
fig3 <- fig3 + geom_smooth(size=2)
fig3 <- fig3 + scale_x_continuous(name="% of 2020 Voters who were Latinx", labels=percent_format(1), limits=c(0,1)) + scale_y_continuous(name="2020 Trump Voteshare, vs 2016", labels=label_number(accuracy=1, scale=100, style_positive="plus", style_negative = "minus"), breaks=seq(-0.4,0.4,0.1))
fig3 <- fig3 + coord_cartesian(ylim=c(-0.4, 0.4))
fig3 <- fig3 + displayprefs

fig3
```

## Table 1: Tract-level Regressions of Trump 2020 voteshare

```{r tab1}

data <- read.csv("vf_vest_census/VoterFile_VEST_Merged.csv") # 67,838 observations

## Create variables in VEST/Voter File data##
data$Hispanic20_Pct <- data$Hispanic20/data$Total20
data$Hisp_NotAgeIn_Pct <- data$Hispanic20Only_22plus/data$Hispanic20Only
data$TotalVotes16 <- data$G16PRERTRU + data$G16PREDCLI
data$TotalVotes20 <- data$G20PRERTRU + data$G20PREDBID
data$Trump20_Pct <- data$G20PRERTRU/data$TotalVotes20
data$Trump16_Pct <- data$G16PRERTRU/data$TotalVotes16
data$TrumpChg <- data$Trump20_Pct - data$Trump16_Pct
data$deviation20 <- data$TotalVotes20/data$Total20

## Merge in 2016-2020 Census ACS demographic data at tract level ##
library(stringr)
data$GEOID <- paste0("14000US", str_pad(data$stfips, 2, "left", pad="0"), str_pad(data$county, 3, "left", pad="0"), str_pad(data$tract, 6, "left", pad="0"))

#B19113I - 2016-2020 Median Family Income (Hispanic head of household only, in 2020 inflation-adjusted $)
faminc <- read.csv("vf_vest_census/B19113I_Tract.csv")
faminc <- subset(faminc, select=c(GEOID, B19113I_001))
faminc <- subset(faminc, !is.na(B19113I_001))
names(faminc)[2] <- "MedHHInc_Latino"
data <- merge(data, faminc, by="GEOID", all.x=TRUE, sort=FALSE)

#B05003I - 2016-2020 Sex by Age by Nativity and Citizenship Status (Hispanic only)
cit <- read.csv("vf_vest_census/B05003I_Tract.csv")
cit$Pop_Latino <- cit$B05003I_001
cit$CVAP_Latino <- cit$B05003I_006 + cit$B05003I_011 + cit$B05003I_017 + cit$B05003I_022 + cit$B05003I_004 + cit$B05003I_009 + cit$B05003I_015 + cit$B05003I_020
cit$PctImm_Latino <- (cit$B05003I_005 + cit$B05003I_010 + cit$B05003I_016 + cit$B05003I_021) / cit$B05003I_001
cit$PctCit_LatinoImm <- (cit$B05003I_006 + cit$B05003I_011 + cit$B05003I_017 + cit$B05003I_022) / (cit$B05003I_005 + cit$B05003I_010 + cit$B05003I_016 + cit$B05003I_021)
cit <- subset(cit, select=c(GEOID, NAME, Pop_Latino, CVAP_Latino, PctImm_Latino, PctCit_LatinoImm))
cit$PctCit_LatinoImm[is.nan(cit$PctCit_LatinoImm)] <- 0 # places where it is dividing by 0 because no immigrants
data <- merge(data, cit, by="GEOID", all.x=TRUE, sort=FALSE)

#C15002I - 2016-2020 Sex by Educational Attainment (Hispanic only)
educ <- read.csv("vf_vest_census/C15002I_Tract.csv")
educ$PctNoColl_Latino <- (educ$C15002I_003 + educ$C15002I_004 + educ$C15002I_008 + educ$C15002I_009)/educ$C15002I_001
educ <- subset(educ, select=c(GEOID, PctNoColl_Latino))
data <- merge(data, educ, by="GEOID", all.x=TRUE, sort=FALSE)

#B16005I- 2016-2020 Nativity by Language Spoken at Home by Ability to Speak English (Hispanic only)
lang <- read.csv("vf_vest_census/B16005I_Tract.csv")
lang$PctNoEng_LatinoNat <- lang$B16005I_006/lang$B16005I_002 # percent of native born Latinos who don't speak english very well
lang <- subset(lang, select=c("GEOID","PctNoEng_LatinoNat", "B16005I_002", "B16005I_006"))
lang$PctNoEng_LatinoNat[is.nan(lang$PctNoEng_LatinoNat)] <- 0 # places where it is dividing by 0 because no native born
data <- merge(data, lang, by="GEOID", all.x=TRUE, sort=FALSE)

## Modeling
data2 <- subset(data, Hispanic20_Pct >= 0.05 & Pop_Latino >= 1 & deviation20 < 2) # 23,293 observations, removes places with very few Latinos or where large deviation between VEST and voterfile counts

models <- list(
  "Bare" = lm(Trump20_Pct ~ Hispanic20_Pct, data=data2, weights=TotalVotes16),
  "Baseline" = lm(Trump20_Pct ~ Hispanic20_Pct + Trump16_Pct, data=data2, weights=TotalVotes16),
  "No Census" = lm(Trump20_Pct ~ Hispanic20_Pct + Hisp_NotAgeIn_Pct + Trump16_Pct, data=data2, weights=TotalVotes16),
  "SES Only" = lm(Trump20_Pct ~ Hispanic20_Pct + log(MedHHInc_Latino) + PctNoColl_Latino + Trump16_Pct, data=data2, weights=TotalVotes16),
  "Immig Only" = lm(Trump20_Pct ~ Hispanic20_Pct + Trump16_Pct + PctImm_Latino*PctCit_LatinoImm + PctNoEng_LatinoNat, data=data2, weights=TotalVotes16), 
  "Full" = lm(Trump20_Pct ~ Hispanic20_Pct + Trump16_Pct + PctImm_Latino*PctCit_LatinoImm + log(MedHHInc_Latino) + PctNoEng_LatinoNat + Hisp_NotAgeIn_Pct + PctNoColl_Latino, data=data2, weights=TotalVotes16)
)

modelsummary(models, stars=TRUE)
```

## Figure A1: Shifts in Covid Variables and Biden Intent

```{r figa1}
library(modelsummary)
library(fixest)

# Run dynamic MRP routine
source("auxiliary_analyses/dynamic_mrp.R")

kable(biden_df, "html") %>%
  kable_styling("striped", full_width = F) %>%
  scroll_box(width = "750px", height = "500px")

biden_df %>%
 filter(!is.na(hispanic_set1)) %>%
 dplyr::group_by(hispanic_set1) %>%
 do(mod = try(broom::tidy(feols(diff ~ scale(lockdown_scale) + scale(death_change) +
                                 change_wording |
                                 state, data = .)))) %>%
  unnest(c(mod)) -> cleaned_analysis_set1

# Table A1
modelsummary(list(feols(diff ~ scale(lockdown_scale) + scale(death_change) +
                  change_wording |
                  state, data = biden_df %>% filter(hispanic_set1 == "Hispanic")),
        feols(diff ~ scale(lockdown_scale) + scale(death_change) +
                  change_wording |
                  state, data = biden_df %>% filter(hispanic_set1 == "Not Hispanic"))),
        gof_map = "all")

# Plot results
g1 <- cleaned_analysis_set1 %>%
 filter(term != "change_wording" & term != "(Intercept)") %>%
 mutate(term = plyr::mapvalues(term, 
             c("scale(lockdown_scale)", "scale(death_change)"), 
             c("Mobility Scale", "Log(COVID deaths\nper 100,000)"))) %>%
  ggplot(aes(x = term, y = estimate, shape = hispanic_set1, group = hispanic_set1)) +
  geom_pointrange(aes(ymin = estimate - 1.96*std.error,
                      ymax = estimate + 1.96*std.error),
                  position = position_dodge(.5),
                  size = 1) +
  theme_ipsum_rc() +
  labs(y = "Monthly change in Biden vote intent",
       x = "",
       shape = "") +
  coord_flip() +
  theme(legend.position = "bottom",
        axis.title.x = element_text(size = 15),
        axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 15),
        legend.text = element_text(size = 15)) +
  geom_hline(yintercept = 0, linetype = "dashed") 

g1
```

## Figure A2: Shifts in Biden Intent Over Time

```{r figa2}
g2 <- state.monthly.predictions.choice_set1 %>%
 mutate(date = lubridate::ym(paste(month.year))) %>%
 filter(date < "2020-12-01") %>%
 group_by(month.year, hispanic_set1) %>%
 mutate(m_median = mean(median, na.rm = T)) %>%
 ungroup() %>%
 mutate(month.year = as.factor(plyr::mapvalues(month.year, 
                                                from = unique(month.year), to = 1:17)),
      hispanic_set1 = plyr::mapvalues(hispanic_set1, c(0,1),
       c("Non-Latino", "Latino"))) %>%
 ggplot(aes(x = month.year, y = median, group = state)) +
 facet_wrap(. ~ hispanic_set1, nrow = 1) +
 geom_line(alpha = .075) +
 geom_vline(xintercept = 11) +
 geom_line(aes(x = month.year, y = m_median), lwd = .5) +
 theme_minimal() +
 scale_x_discrete(breaks = c(1, 4, 8, 12, 17), 
                  labels = c(paste(month.abb[7:12], "\n19"), 
                             paste(month.abb, "\n20"), paste(month.abb, "\n21"))[c(1,5,8,12,17)]) +
 labs(x = "Date", y = "Monthly Estimates of Biden Support") +
  annotate("text", x = 13, y = .015, label = "BLM Protests", family = "Roboto",
           size = 3) +
  annotate("text", x = 13, y = .015, label = "BLM Protests", family = "Roboto",
           size = 3) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

g2
```

## Figure A3: Tract-level Trump Voteshare in 2020

```{r figa3}

# Load merged Voterfile and VEST data
data <- read.csv("vf_vest_census/VoterFile_VEST_Merged.csv")

# Create variables 
data$Hispanic20_Pct <- data$Hispanic20/data$Total20
data$TotalVotes16 <- data$G16PRERTRU + data$G16PREDCLI
data$TotalVotes20 <- data$G20PRERTRU + data$G20PREDBID
data$Trump20_Pct <- data$G20PRERTRU/data$TotalVotes20
data$Trump16_Pct <- data$G16PRERTRU/data$TotalVotes16
data$Nsize <- data$TotalVotes16
data$Nsize[data$Nsize > 10000] <- 10000 # topcode for point sizing ONLY

displayprefs <-  theme_bw() + theme(panel.grid.minor=element_blank(), 
                                    panel.grid.major.y=element_blank(), 
                                    panel.grid.major.x=element_blank(),
                                    panel.border=element_blank(),
                                    axis.text.x=element_text(family="Helvetica", size=12, color="black"),
                                    axis.title.x=element_text(family="Helvetica", size=14, color="black", face="bold"),
                                    axis.title.y=element_text(family="Helvetica", size=12, color="black", vjust=1.1),
                                    axis.title.y.right=element_text(family="Helvetica", size=12, color="blue", vjust=1.1),
                                    axis.text.y=element_text(family="Helvetica", size=12, color="black"),
                                    axis.text.y.right=element_text(family="Helvetica", size=10, color="blue"),
                                    axis.line=element_line(color="black", size=0.5),
                                    axis.ticks.x=element_line(color="black", size=1),
                                    axis.ticks.y.right=element_line(color="blue", size=0.5),
                                    plot.caption=element_text(color="gray40"),
                                    plot.subtitle=element_text(family="Helvetica", size=8, vjust=0),
                                    plot.title=element_text(family="Helvetica", size=14, vjust=0, face="bold"),
                                    legend.text=element_text(family="Helvetica", size=12, color="black"),
                                    legend.position="none",
                                    legend.title=element_blank())

figa3 <- ggplot(subset(data, TotalVotes16 > 100), aes(x = Hispanic20_Pct, y = Trump20_Pct, size=Nsize/100))
figa3 <- figa3 + geom_point(alpha=0.1)
figa3 <- figa3 + geom_smooth(size=2)
figa3 <- figa3 + scale_x_continuous(name="% of 2020 Voters who were Latinx", labels=percent_format(1), limits=c(0,1)) + scale_y_continuous(name="2020 Trump Two-Party %", labels=percent_format(1), breaks=seq(0,1,0.1))
figa3 <- figa3 + displayprefs

figa3
```

## Figure A4: Tract-level Trump Voteshare in 2020, Florida vs. rest of U.S.

```{r figa4}

# Load merged Voterfile and VEST data
data <- read.csv("vf_vest_census/VoterFile_VEST_Merged.csv")

# Create variables 
data$Hispanic20_Pct <- data$Hispanic20/data$Total20
data$TotalVotes16 <- data$G16PRERTRU + data$G16PREDCLI
data$TotalVotes20 <- data$G20PRERTRU + data$G20PREDBID
data$Trump20_Pct <- data$G20PRERTRU/data$TotalVotes20
data$Trump16_Pct <- data$G16PRERTRU/data$TotalVotes16
data$Nsize <- data$TotalVotes16
data$Nsize[data$Nsize > 10000] <- 10000 # topcode for point sizing ONLY

displayprefs <-  theme_bw() + theme(panel.grid.minor=element_blank(), 
                                    panel.grid.major.y=element_blank(), 
                                    panel.grid.major.x=element_blank(),
                                    panel.border=element_blank(),
                                    axis.text.x=element_text(family="Helvetica", size=12, color="black"),
                                    axis.title.x=element_text(family="Helvetica", size=14, color="black", face="bold"),
                                    axis.title.y=element_text(family="Helvetica", size=12, color="black", vjust=1.1),
                                    axis.title.y.right=element_text(family="Helvetica", size=12, color="blue", vjust=1.1),
                                    axis.text.y=element_text(family="Helvetica", size=12, color="black"),
                                    axis.text.y.right=element_text(family="Helvetica", size=10, color="blue"),
                                    axis.line=element_line(color="black", size=0.5),
                                    axis.ticks.x=element_line(color="black", size=1),
                                    axis.ticks.y.right=element_line(color="blue", size=0.5),
                                    plot.caption=element_text(color="gray40"),
                                    plot.subtitle=element_text(family="Helvetica", size=8, vjust=0),
                                    plot.title=element_text(family="Helvetica", size=14, vjust=0, face="bold"),
                                    legend.text=element_text(family="Helvetica", size=12, color="black"),
                                    legend.position="none",
                                    legend.title=element_blank())

figa4 <- ggplot(subset(data, TotalVotes16 > 100 & stfips != 12), aes(x = Hispanic20_Pct, y = Trump20_Pct, size=Nsize/100))
figa4 <- figa4 + geom_point(alpha=0.1)
figa4 <- figa4 + geom_point(data=subset(data, stfips==12 & TotalVotes16 > 100), color="red", alpha=0.1)
figa4 <- figa4 + geom_smooth(size=1)
figa4 <- figa4 + geom_smooth(data=subset(data, stfips==12 & TotalVotes16 > 100), color="red", size=2)
figa4 <- figa4 + scale_x_continuous(name="% of 2020 Voters who were Latinx", labels=percent_format(1), limits=c(0,1)) + scale_y_continuous(name="2020 Trump Two-Party %", labels=percent_format(1), breaks=seq(0,1,0.1))
figa4 <- figa4 + displayprefs

figa4
```

## Figure A5: Total Presidential votes in 2016 and 2020 in heavily-Latino tracts

```{r figa5}

# Load merged Voterfile and VEST data
data <- read.csv("vf_vest_census/VoterFile_VEST_Merged.csv")

# Create variables 
data$Hispanic20_Pct <- data$Hispanic20/data$Total20
data$TotalVotes16 <- data$G16PRERTRU + data$G16PREDCLI
data$TotalVotes20 <- data$G20PRERTRU + data$G20PREDBID
data$Trump20_Pct <- data$G20PRERTRU/data$TotalVotes20
data$Trump16_Pct <- data$G16PRERTRU/data$TotalVotes16

data_h <- subset(data, Hispanic20_Pct > 0.75)

data_h$state2 <- data_h$state
data_h$state2[!(data_h$state2 %in% c("ca","tx","fl"))] <- "Other"

data_h2 <- data_h %>%
  group_by(state2) %>%
  dplyr::summarize(Trump20 = sum(G20PRERTRU), Trump16 = sum(G16PRERTRU), Biden20 = sum(G20PREDBID), Clinton16 = sum(G16PREDCLI))

data_h2 <- melt(data_h2, id.vars="state2")
data_h2$state2 <- as.factor(data_h2$state2)
levels(data_h2$state2) <- c("California (N=224)","Florida (N=108)","Other (N=62)","Texas (N=295)")

displayprefs <-  theme_bw() + theme(panel.grid.minor=element_blank(), 
                                    panel.grid.major.y=element_blank(), 
                                    panel.grid.major.x=element_blank(),
                                    panel.border=element_blank(),
                                    axis.text.x=element_text(family="Helvetica", size=12, color="black"),
                                    axis.title.x=element_text(family="Helvetica", size=14, color="black", face="bold"),
                                    axis.title.y=element_text(family="Helvetica", size=12, color="black", vjust=1.1),
                                    axis.title.y.right=element_text(family="Helvetica", size=12, color="blue", vjust=1.1),
                                    axis.text.y=element_text(family="Helvetica", size=12, color="black"),
                                    axis.text.y.right=element_text(family="Helvetica", size=10, color="blue"),
                                    axis.line=element_line(color="black", size=0.5),
                                    axis.ticks.x=element_line(color="black", size=1),
                                    axis.ticks.y.right=element_line(color="blue", size=0.5),
                                    plot.caption=element_text(color="gray40"),
                                    plot.subtitle=element_text(family="Helvetica", size=8, vjust=0),
                                    plot.title=element_text(family="Helvetica", size=14, vjust=0, face="bold"),
                                    legend.text=element_text(family="Helvetica", size=12, color="black"),
                                    legend.position="none",
                                    legend.title=element_blank())

figa5 <- ggplot(data=data_h2, aes(x=variable, y=value, fill=variable))
figa5 <- figa5 + geom_bar(position="dodge", stat="identity")
figa5 <- figa5 + facet_wrap(~state2, scales="free", nrow=1)
figa5 <- figa5 + displayprefs
figa5 <- figa5 + scale_x_discrete(name="", labels=c("Trump\n(20)","Trump\n(16)","Biden\n(20)","Clinton\n(16)"))
figa5 <- figa5 + scale_fill_manual(values=c("red3", "firebrick1","blue3","dodgerblue1"))
figa5 <- figa5 + scale_y_continuous(name="Total Votes in Tracts 75%+ Latinx", labels=label_number(1, big.mark=","), expand=expansion(mult=c(0,0.025)))
figa5 <- figa5 + theme(axis.text.x=element_text(family="Helvetica", size=10, color="black"))

figa5
```

## Table A2: Trump 2020 voteshare - Trump 2016 voteshare in heavily-Latino tracts, state weighted averages

```{r taba2}

# Load merged Voterfile and VEST data
data <- read.csv("vf_vest_census/VoterFile_VEST_Merged.csv")

# Create variables 
data$Hispanic20_Pct <- data$Hispanic20/data$Total20
data$TotalVotes16 <- data$G16PRERTRU + data$G16PREDCLI
data$TotalVotes20 <- data$G20PRERTRU + data$G20PREDBID
data$Trump20_Pct <- data$G20PRERTRU/data$TotalVotes20
data$Trump16_Pct <- data$G16PRERTRU/data$TotalVotes16
data$TrumpChg <- data$Trump20_Pct - data$Trump16_Pct
data$deviation20 <- data$TotalVotes20/data$Total20

## Merge in 2016-2020 Census ACS demographic data at tract level ##
data$GEOID <- paste0("14000US", str_pad(data$stfips, 2, "left", pad="0"), str_pad(data$county, 3, "left", pad="0"), str_pad(data$tract, 6, "left", pad="0"))

#B05003I - 2016-2020 Sex by Age by Nativity and Citizenship Status (Hispanic only)
cit <- read.csv("vf_vest_census/B05003I_Tract.csv")
cit$Pop_Latino <- cit$B05003I_001
cit$CVAP_Latino <- cit$B05003I_006 + cit$B05003I_011 + cit$B05003I_017 + cit$B05003I_022 + cit$B05003I_004 + cit$B05003I_009 + cit$B05003I_015 + cit$B05003I_020
cit$PctImm_Latino <- (cit$B05003I_005 + cit$B05003I_010 + cit$B05003I_016 + cit$B05003I_021) / cit$B05003I_001
cit$PctCit_LatinoImm <- (cit$B05003I_006 + cit$B05003I_011 + cit$B05003I_017 + cit$B05003I_022) / (cit$B05003I_005 + cit$B05003I_010 + cit$B05003I_016 + cit$B05003I_021)
cit <- subset(cit, select=c(GEOID, NAME, Pop_Latino, CVAP_Latino, PctImm_Latino, PctCit_LatinoImm))
cit$PctCit_LatinoImm[is.nan(cit$PctCit_LatinoImm)] <- 0 # places where it is dividing by 0 because no immigrants
data <- merge(data, cit, by="GEOID", all.x=TRUE, sort=FALSE)

datast75 <- data %>%
  filter(Hispanic20_Pct >= 0.75 & Pop_Latino >= 1 & deviation20 < 2) %>%
  group_by(state) %>%
  dplyr::summarize(TrumpGain = weighted.mean(TrumpChg, w=Hispanic20))
datast75$state <- toupper(datast75$state)
datast75t <- data %>%
  filter(Hispanic20_Pct >= 0.75 & Pop_Latino >= 1 & deviation20 < 2) %>%
  dplyr::summarize(TrumpGain = weighted.mean(TrumpChg, w=Hispanic20))
datast75t$state <- "National"

datast75 <- rbind(datast75, datast75t)
datast75 <- subset(datast75, select=c(state, TrumpGain))
datast75$TrumpGain <- percent(datast75$TrumpGain, 0.1, suffix=" pp")

datast75
```

## Table A3: Tract-level Regressions of Trump 2020 voteshare, holding out CA, FL, TX

```{r taba3}

data <- read.csv("vf_vest_census/VoterFile_VEST_Merged.csv") # 67,838 observations

## Create variables in VEST/Voter File data##
data$Hispanic20_Pct <- data$Hispanic20/data$Total20
data$Hisp_NotAgeIn_Pct <- data$Hispanic20Only_22plus/data$Hispanic20Only
data$TotalVotes16 <- data$G16PRERTRU + data$G16PREDCLI
data$TotalVotes20 <- data$G20PRERTRU + data$G20PREDBID
data$Trump20_Pct <- data$G20PRERTRU/data$TotalVotes20
data$Trump16_Pct <- data$G16PRERTRU/data$TotalVotes16
data$TrumpChg <- data$Trump20_Pct - data$Trump16_Pct
data$deviation20 <- data$TotalVotes20/data$Total20

## Merge in 2016-2020 Census ACS demographic data at tract level ##
library(stringr)
data$GEOID <- paste0("14000US", str_pad(data$stfips, 2, "left", pad="0"), str_pad(data$county, 3, "left", pad="0"), str_pad(data$tract, 6, "left", pad="0"))

#B19113I - 2016-2020 Median Family Income (Hispanic head of household only, in 2020 inflation-adjusted $)
faminc <- read.csv("vf_vest_census/B19113I_Tract.csv")
faminc <- subset(faminc, select=c(GEOID, B19113I_001))
faminc <- subset(faminc, !is.na(B19113I_001))
names(faminc)[2] <- "MedHHInc_Latino"
data <- merge(data, faminc, by="GEOID", all.x=TRUE, sort=FALSE)

#B05003I - 2016-2020 Sex by Age by Nativity and Citizenship Status (Hispanic only)
cit <- read.csv("vf_vest_census/B05003I_Tract.csv")
cit$Pop_Latino <- cit$B05003I_001
cit$CVAP_Latino <- cit$B05003I_006 + cit$B05003I_011 + cit$B05003I_017 + cit$B05003I_022 + cit$B05003I_004 + cit$B05003I_009 + cit$B05003I_015 + cit$B05003I_020
cit$PctImm_Latino <- (cit$B05003I_005 + cit$B05003I_010 + cit$B05003I_016 + cit$B05003I_021) / cit$B05003I_001
cit$PctCit_LatinoImm <- (cit$B05003I_006 + cit$B05003I_011 + cit$B05003I_017 + cit$B05003I_022) / (cit$B05003I_005 + cit$B05003I_010 + cit$B05003I_016 + cit$B05003I_021)
cit <- subset(cit, select=c(GEOID, NAME, Pop_Latino, CVAP_Latino, PctImm_Latino, PctCit_LatinoImm))
cit$PctCit_LatinoImm[is.nan(cit$PctCit_LatinoImm)] <- 0 # places where it is dividing by 0 because no immigrants
data <- merge(data, cit, by="GEOID", all.x=TRUE, sort=FALSE)

#C15002I - 2016-2020 Sex by Educational Attainment (Hispanic only)
educ <- read.csv("vf_vest_census/C15002I_Tract.csv")
educ$PctNoColl_Latino <- (educ$C15002I_003 + educ$C15002I_004 + educ$C15002I_008 + educ$C15002I_009)/educ$C15002I_001
educ <- subset(educ, select=c(GEOID, PctNoColl_Latino))
data <- merge(data, educ, by="GEOID", all.x=TRUE, sort=FALSE)

#B16005I- 2016-2020 Nativity by Language Spoken at Home by Ability to Speak English (Hispanic only)
lang <- read.csv("vf_vest_census/B16005I_Tract.csv")
lang$PctNoEng_LatinoNat <- lang$B16005I_006/lang$B16005I_002 # percent of native born Latinos who don't speak english very well
lang <- subset(lang, select=c("GEOID","PctNoEng_LatinoNat", "B16005I_002", "B16005I_006"))
lang$PctNoEng_LatinoNat[is.nan(lang$PctNoEng_LatinoNat)] <- 0 # places where it is dividing by 0 because no native born
data <- merge(data, lang, by="GEOID", all.x=TRUE, sort=FALSE)

## Modeling
data2 <- subset(data, Hispanic20_Pct >= 0.05 & Pop_Latino >= 1 & deviation20 < 2) # 23,293 observations, removes places with very few Latinos or where large deviation between VEST and voterfile counts

models2 <- list(
  "Full" = lm(Trump20_Pct ~ Hispanic20_Pct + Trump16_Pct + PctImm_Latino*PctCit_LatinoImm + log(MedHHInc_Latino) + PctNoEng_LatinoNat + Hisp_NotAgeIn_Pct + PctNoColl_Latino, data=data2, weights=TotalVotes16),
  "NoFlorida" = lm(Trump20_Pct ~ Hispanic20_Pct + Trump16_Pct + PctImm_Latino*PctCit_LatinoImm + log(MedHHInc_Latino) + PctNoEng_LatinoNat + Hisp_NotAgeIn_Pct + PctNoColl_Latino, data=subset(data2, state != "fl"), weights=TotalVotes16),
  "NoTexas" = lm(Trump20_Pct ~ Hispanic20_Pct + Trump16_Pct + PctImm_Latino*PctCit_LatinoImm + log(MedHHInc_Latino) + PctNoEng_LatinoNat + Hisp_NotAgeIn_Pct + PctNoColl_Latino, data=subset(data2, state != "tx"), weights=TotalVotes16),
  "NoCalifornia" = lm(Trump20_Pct ~ Hispanic20_Pct + Trump16_Pct + PctImm_Latino*PctCit_LatinoImm + log(MedHHInc_Latino) + PctNoEng_LatinoNat + Hisp_NotAgeIn_Pct + PctNoColl_Latino, data=subset(data2, state != "ca"), weights=TotalVotes16)
)
modelsummary(models2, stars=TRUE)
```

## Figure A6: Latino shifts across states

```{r figa6}
data2 %>%
 mutate(Shift = Trump20_Pct-Trump16_Pct) %>%
 group_by(state) %>%
 mutate(max_hisp = max(Hispanic20_Pct),
        mean_hisp = mean(Hispanic20_Pct)) %>%
 filter(max_hisp >= .5) %>%
 mutate(state = toupper(state)) %>%
 ggplot(aes(x = Hispanic20_Pct,
            y = Shift,
            weight = TotalVotes16)) +
 facet_wrap(. ~ forcats::fct_reorder(state,
                                     -mean_hisp)) +
 geom_point(alpha = .01) +
 geom_smooth(color = "red", se = FALSE, lwd = 1) +
 theme_ipsum_rc() +
 labs(y = "Trump Shift (2016-2020)",
      x = "Pct. Latino (2020)") +
 theme(panel.grid.major = element_blank(),
       panel.grid.minor = element_blank(),
       axis.text.y = element_text(size = 14),
       axis.title.x = element_text(size = 14),
       axis.title.y = element_text(size = 14)) +
 geom_hline(yintercept = 0,
            linetype = "dashed") 
```

## Figure A7: Net vote shifts (2020-2022)

```{r figa8}
# load CES surveys
CES_2020 <- rio::import("2020_CES/CES20_Common_OUTPUT_vv.dta")
CES_2022 <- rio::import("2022_CES/CCES22_Common_OUTPUT_vv_topost.dta", encoding = "UTF-8")

# load weights
weights_2020 <- rio::import("weights/CES_2020_LatWeights.csv")
weights_2022 <- rio::import("weights/CES_2022_LatWeights.csv")

# subset on Latino and clean 2020 variables
CES_2020_sub <- CES_2020 %>%
 filter(race == 3 | hispanic == 1) %>%
 transmute(caseid,
           state = inputstate,
           gender = as.numeric(gender == 2),
           voted = as.numeric(!is.na(CL_2020gvm)),
           vote.choice = ifelse(CC20_410 > 4, NA, 
                                CC20_410), #1
           retro.econ = ifelse(CC20_302 > 5, NA, CC20_302),
           ideo5 = ifelse(ideo5 > 5, NA, ideo5),
           religion = plyr::mapvalues(religpew, c(3:8, 9:11, 12, 98, 99), c(rep(3, 6), rep(4, 3), 3, NA, NA)),
           age_group = plyr::mapvalues(cut(2020 - birthyr,
                                           c(18, 35, 55, 100),
                                           include.lowest = F,
                                           right = F), c("[18,35)", "[35,55)", "[55,100)"),
                                       c("age1834", "age3554", "age55plus")),
           pid3 = ifelse(pid3 > 3, NA, pid3),
           imm.stat = ifelse(immstat > 5, NA, immstat),
           crime1 = as.numeric(CC20_334a == 2),
           crime2 = as.numeric(CC20_334b == 2),
           crime3 = as.numeric(CC20_334c == 1),
           crime5 = as.numeric(CC20_334d == 2),
           crime6 = as.numeric(CC20_334e == 2),
           crime7 = as.numeric(CC20_334f == 2),
           crime8 = as.numeric(CC20_334g == 2),
           crime9 = as.numeric(CC20_334h == 2),
           smedia1 = as.numeric(CC20_300d_1 == 1),
           smedia2 = as.numeric(CC20_300d_2 == 1),
           smedia3 = as.numeric(CC20_300d_3 == 1),
           smedia4 = as.numeric(CC20_300d_4 == 1),
           smedia5 = as.numeric(CC20_300d_5 == 1),
           mig1 = as.numeric(CC20_331a == 2),
           mig2 = as.numeric(CC20_331b == 1),
           mig5 = as.numeric(CC20_331c == 1),
           mig6 = as.numeric(CC20_331d == 1),
           mig7 = as.numeric(CC20_331e == 1),
           educ = ifelse(educ > 6, NA, educ), 
           faminc = ifelse(faminc_new > 16, NA, faminc_new), 
           mexican = as.numeric(CC20_hisp_3 == 1),
           pr = as.numeric(CC20_hisp_4 == 1),
           cuban = as.numeric(CC20_hisp_5 == 1),
           weight = commonweight,
           vvweight = vvweight) %>%
 left_join(weights_2020)

CES_2020_sub[which(is.na(CES_2020_sub$eb_weights)), 
             "eb_weights"] <- mean(CES_2020_sub$eb_weights, na.rm = T)

# Create variables for Senate and House candidate information for each participant based on their state of residence:
CES_2022 <- CES_2022 %>%
 left_join(CES_2022 %>%
            group_by(inputstate) %>%
            summarise(SenateCand1Info = first(paste(SenCand1Name, "(", SenCand1Party, ")")),
                      SenateCand2Info = first(paste(SenCand2Name, "(", SenCand2Party, ")")),
                      SenateCand3Info = first(paste(SenCand3Name, "(", SenCand3Party, ")")),
                      SenateCand4Info = first(paste(SenCand4Name, "(", SenCand4Party, ")")),
                      HouseCand1Info = first(paste(HouseCand1Name, "(", HouseCand1Party, ")")),
                      HouseCand2Info = first(paste(HouseCand2Name, "(", HouseCand2Party, ")")),
                      HouseCand3Info = first(paste(HouseCand3Name, "(", HouseCand3Party, ")")),
                      HouseCand4Info = first(paste(HouseCand4Name, "(", HouseCand4Party, ")")),
                      HouseCand5Info = first(paste(HouseCand5Name, "(", HouseCand5Party, ")")),
                      HouseCand6Info = first(paste(HouseCand6Name, "(", HouseCand6Party, ")"))),
           by = "inputstate")

# Updated code to handle the Voted_Democrat variable for both Senate and House data:
CES_2022 <- CES_2022 %>%
 mutate(HouseVoteChoice_Info = case_when(
  CC22_412 == 1 ~ HouseCand1Info,
  CC22_412 == 2 ~ HouseCand2Info,
  CC22_412 == 3 ~ HouseCand3Info,
  CC22_412 == 4 ~ HouseCand4Info,
  CC22_412 == 5 ~ HouseCand5Info,
  CC22_412 == 6 ~ HouseCand6Info,
  CC22_412 == 10 ~ "Other",
  CC22_412 == 98 ~ "I'm not sure",
  CC22_412 == 99 ~ "I didn't vote in this election",
  TRUE ~ NA_character_  # Handle other cases (e.g., missing values) as NA
 ),
 Voted_Democrat_House = case_when(
  CC22_412 %in% 1:6 & (grepl("Democratic", HouseVoteChoice_Info)) ~ 1,
  CC22_412 %in% 1:6 & (grepl("Republican", HouseVoteChoice_Info)) ~ 2,
  TRUE ~ NA_integer_  # Participants who didn't vote are coded as NA
 )
 )

# Second step: subset on Latino and clean 2022 variables
CES_2022_sub <- CES_2022 %>%
 filter(race == 3 | hispanic == 1) %>% 
 mutate(fips = as.numeric(countyfips)) %>% 
 transmute(caseid,
           state = inputstate,
           gender = ifelse(gender4 > 2, NA, as.numeric(gender4 == 2)), #removes non-binary and other.
           voted = ifelse(TS_g2022 == 7 | is.na(TS_g2022), 0, 1),
           vote.choice = Voted_Democrat_House,  
           retro.econ = ifelse(CC22_302 > 5, NA, CC22_302),
           ideo5 = ifelse(ideo5 > 5, NA, ideo5),
           religion = plyr::mapvalues(religpew, c(3:8, 9:11, 
                                                  12, 
                                                  98, 99), c(rep(3, 6), rep(4, 3), 
                                                             3, 
                                                             NA, NA)),
           age_group = plyr::mapvalues(cut(2022 - birthyr,
                                           c(18, 35, 55, 100),
                                           include.lowest = F,
                                           right = F), c("[18,35)", "[35,55)", "[55,100)"),
                                       c("age1834", "age3554", "age55plus")),
           pid3 = ifelse(pid3 > 3, NA, pid3),
           imm.stat = ifelse(immstat > 5, NA, immstat),
           
           crime1 = as.numeric(CC22_334a == 2),
           crime2 = as.numeric(CC22_334b == 2),
           crime3 = as.numeric(CC22_334c == 1),
           crime5 = as.numeric(CC22_334d == 2),
           crime6 = as.numeric(CC22_334e == 2),
           crime7 = as.numeric(CC22_334f == 2),
           crime8 = as.numeric(CC22_334g == 2),
           crime9 = as.numeric(CC22_334h == 2),
           
           smedia1 = as.numeric(CC22_300d_1 == 1),
           smedia2 = as.numeric(CC22_300d_2 == 1),
           smedia3 = as.numeric(CC22_300d_3 == 1),
           smedia4 = as.numeric(CC22_300d_4 == 1),
           smedia5 = as.numeric(CC22_300d_5 == 1),
           
           mig1 = as.numeric(CC22_331a == 2),   
           mig2 = as.numeric(CC22_331b == 1),
           mig6 = as.numeric(CC22_331c == 1),
           mig7 = as.numeric(CC22_331d == 1),
           
           educ = ifelse(educ > 6, NA, educ), 
           faminc = ifelse(faminc_new > 16, NA, faminc_new), 
           mexican = as.numeric(CC22_hisp_3 == 1),
           pr = as.numeric(CC22_hisp_4 == 1),
           cuban = as.numeric(CC22_hisp_5 == 1),
           weight = commonweight) %>%
 left_join(weights_2022)

CES_2022_sub[which(is.na(CES_2022_sub$eb_weights)), 
             "eb_weights"] <- mean(CES_2022_sub$eb_weights, na.rm = T)

# stack CES surveys
CES_sub <- bind_rows(CES_2020_sub %>% mutate(year = 2020), 
                     CES_2022_sub %>% mutate(year = 2022))

CES_sub <- CES_sub %>% mutate(State = plyr::mapvalues(state, 
                                                      sjlabelled::get_values(CES_2020$inputstate), 
                                                      sjlabelled::get_labels(CES_2020$inputstate))) %>%
 mutate(region = state.region[match(State, state.name)])

# additional cleaning
CES_sub <- CES_sub %>% 
 mutate(fam5 = ntile(faminc, 5)) %>%
 bind_cols(police = fscores(mirt(CES_sub %>% 
                                  dplyr::select(starts_with("crime")),
                                 model = 1, verbose = FALSE, itemtype = "2PL"))[,1],
           media = fscores(mirt(CES_sub %>% 
                                 dplyr::select(starts_with("smedia")),
                                model = 1, verbose = FALSE, itemtype = "2PL"))[,1],
           imm = fscores(mirt(CES_sub %>% 
                               dplyr::select(starts_with("mig")),
                              model = 1, verbose = FALSE, itemtype = "2PL"))[,1]) %>%
 mutate(educ = plyr::mapvalues(educ, 1:6, c(1,1,2,2,3,3))) %>%
 mutate(region = plyr::mapvalues(region, c("Northeast", "South",
                                           "North Central", "West"),
                                 c(1, 2, 3, 4))) %>%
 mutate(age_group = as.numeric(age_group))

CES_sub$police <- ntile(CES_sub$police, 4)
CES_sub$imm <- ntile(CES_sub$imm, 4)
CES_sub$media <- ntile(CES_sub$media, 4)
CES_sub$gender <- CES_sub$gender+1

## ALERT: ASSUME TURNOUT STABILITY

# implement GMTL method
GMTL <- function(data, indices, variable, value, ...) {
 
 # for bootstrapping
 data <- data[indices,]
 
 # Set up weights for 2020
 wgs.2020 <- svydesign(ids = ~1, weights = ~ eb_weights, 
                       data = data %>% filter(year == 2020) %>% 
                        mutate(vote.choice.1 = as.numeric(vote.choice == 1),
                               vote.choice.2 = as.numeric(vote.choice == 2),
                               vote.subgroup = as.numeric(voted == 1 & get(variable) == value),
                               subgroup = as.numeric(get(variable) == value)))
 
 # Set up weights for 2022
 wgs.2022 <- svydesign(ids = ~1, weights = ~ eb_weights, 
                       data = data %>% filter(year == 2022) %>%
                        mutate(vote.choice.1 = as.numeric(vote.choice == 1),
                               vote.choice.2 = as.numeric(vote.choice == 2),
                               vote.subgroup = as.numeric(voted == 1 & get(variable) == value),
                               subgroup = as.numeric(get(variable) == value)) %>%
                        filter(!is.na(voted)))
 
 # calculate weighted estimate of Pr(Vote Choice | Subgroup, 2020)
 vote1.2020 <- sum(coef(svyglm(vote.choice.1 ~ subgroup, wgs.2020,
                               subset = voted == 1)))
 vote2.2020 <- sum(coef(svyglm(vote.choice.2 ~ subgroup, wgs.2020,
                               subset = voted == 1)))
 
 # calculate weighted estimate of Pr(Vote Choice | Subgroup, 2022)
 vote1.2022 <- sum(coef(svyglm(vote.choice.1 ~ subgroup, wgs.2022,
                               subset = voted == 1)))
 vote2.2022 <- sum(coef(svyglm(vote.choice.2 ~ subgroup, wgs.2022,
                               subset = voted == 1)))
 
 # calculate weighted estimate of Pr(Turnout | Subgroup, 2020)
 turnout.2020 <- sum(coef(svyglm(voted ~ subgroup, wgs.2020)))
 # calculate weighted estimate of Pr(Turnout | Subgroup, 2022)  
 turnout.2022 <- turnout.2020
 
 # calculate weighted estimate of Pr(Subgroup | 2020)
 px.2020 <- svymean(~subgroup, wgs.2020, na.rm = T)
 # calculate weighted estimate of Pr(Subgroup | 2022)
 px.2022 <- svymean(~subgroup, wgs.2022, na.rm = T)
 
 # Return net vote variable and individual variables used to construct it
 return(c((((vote1.2022 - vote2.2022) * turnout.2022 * px.2022) - 
            ((vote1.2020 - vote2.2020) * turnout.2020 * px.2020)),
          vote1.2020, vote1.2022, vote2.2020, vote2.2022,
          turnout.2020, turnout.2022, px.2020, px.2022))
 
}

set.seed(33014)

# begin bootstrapping routine
t1_2022 <- CES_sub %>%
 summarise_each(~n_distinct(.x, na.rm = T)) %>% 
 # select subgroup vars
 dplyr::select(imm.stat, pid3, ideo5, age_group, gender,
               educ, fam5, police, imm, 
               mexican, cuban, pr,
               region, media, religion) %>%
 gather(x, y) %>% 
 dplyr::group_by(x) %>%
 dplyr::summarize(value = 1:y) %>%
 dplyr::rename(variable = x) %>%
 ungroup() %>%
 as.list() %>%
 pmap(\(variable, value) broom::tidy(boot(statistic = GMTL, 
                                          variable = variable, 
                                          value = value, 
                                          R = 1000, data = CES_sub,
                                          ncpus = 10,
                                          parallel = "multicore")) %>% 
       mutate(term = c("shift", 
                       "vote1.2020", "vote1.2022", 
                       "vote2.2020", "vote2.2022", 
                       "turnout.2020", "turnout.2022", 
                       "px.2020", "px.2022")) %>% 
       dplyr::select(term, statistic, std.error) %>%
       bind_cols(variable = variable, value = value)) %>% 
 bind_rows() 

# set up data for plots

t1_2022 %>%
 filter(variable == "mexican" & value == 1 |
         variable == "cuban" & value == 1 |
         variable == "pr" & value == 1 |
         variable != "pr" & variable != "mexican" & variable != "cuban") -> t1_2022.r

#t1_2022.r[which(t1_2022.r$variable == "mexican"),"value"]
t1_2022.r[which(t1_2022.r$variable == "cuban"),"value"] <- 2
t1_2022.r[which(t1_2022.r$variable == "pr"),"value"] <- 3

t1_2022.r %>%
 filter(variable != "imm.stat" | 
         variable == "imm.stat" & value != 2) %>%
 mutate(variable = plyr::mapvalues(variable, c("mexican", "cuban", "pr"),
                                   c("ancestry", "ancestry", "ancestry"))) %>%
 nest(statistic, std.error, .key = 'value_col') %>%
 spread(key = term, value = value_col) %>%
 mutate(value = ifelse(variable == "educ", plyr::mapvalues(value, 
                                                           1:3,
                                                           c("  No college", 
                                                             " Some College", 
                                                             "College+")),
                       value)) %>%
 mutate(value = ifelse(variable == "age_group", plyr::mapvalues(value, 
                                                                1:3,
                                                                c("18-34", 
                                                                  "34-55", 
                                                                  "55+")),
                       value)) %>%
 mutate(value = ifelse(variable == "fam5", plyr::mapvalues(value, 1:5,
                                                           c("Quantile 1", 
                                                             "Quantile 2", 
                                                             "Quantile 3", 
                                                             "Quantile 4", 
                                                             "Quantile 5")),
                       value)) %>%
 mutate(value = ifelse(variable == "ideo5", plyr::mapvalues(value, 1:5,
                                                            c("   Very\nLiberal", 
                                                              " Liberal", 
                                                              " Mod.", 
                                                              "Conservative", 
                                                              "Very\nConservative")),
                       value)) %>%
 mutate(value = ifelse(variable == "imm.stat", plyr::mapvalues(value, 1:5,
                                                               c(" Immigrant\nCitizen", 
                                                                 "", 
                                                                 "1st\nGeneration", 
                                                                 "2nd\nGeneration", 
                                                                 "3rd\nGeneration")),
                       value)) %>% 
 mutate(value = ifelse(variable == "pid3", plyr::mapvalues(value, 1:3,
                                                           c("Democrat",
                                                             "Republican",
                                                             "Independent")),
                       value)) %>%
 mutate(value = ifelse(variable == "police", plyr::mapvalues(value, 1:5,
                                                             c("Quantile 1", 
                                                               "Quantile 2", 
                                                               "Quantile 3", 
                                                               "Quantile 4", 
                                                               "Quantile 5")),
                       value)) %>%
 mutate(value = ifelse(variable == "region", plyr::mapvalues(value, 1:4,
                                                             c("Northeast", "South",
                                                               "North Central", "West")),
                       value)) %>%
 mutate(value = ifelse(variable == "imm", plyr::mapvalues(value, 1:5,
                                                          c("Quantile 1", 
                                                            "Quantile 2", 
                                                            "Quantile 3", 
                                                            "Quantile 4", 
                                                            "Quantile 5")),
                       value)) %>%
 mutate(value = ifelse(variable == "media", plyr::mapvalues(value, 1:5,
                                                            c("Quantile 1", 
                                                              "Quantile 2", 
                                                              "Quantile 3", 
                                                              "Quantile 4", 
                                                              "Quantile 5")),
                       value)) %>%
 mutate(value = ifelse(variable == "ancestry", plyr::mapvalues(value, 1:3,
                                                               c("Mexican", 
                                                                 "Cuban", 
                                                                 "Puerto\nRican")),
                       value)) %>%
 mutate(value = ifelse(variable == "gender", plyr::mapvalues(value, 1:2,
                                                             c("M", 
                                                               "F")),
                       value)) %>%
 mutate(value = ifelse(variable == "religion", plyr::mapvalues(value, 1:4,
                                                               c("Protestant", 
                                                                 "Catholic",
                                                                 "Other",
                                                                 "Atheist/Agnostic")),
                       value)) %>%
 mutate(variable = plyr::mapvalues(variable, c("imm", "police",
                                               "ideo5", "fam5",
                                               "educ", "pid3", "imm.stat",
                                               "media", "ancestry", "age_group",
                                               "region", "gender",  "religion"), c("Immigration Restrictionism",
                                                                                            "Conservative Crime Policy Support", "Ideology",
                                                                                            "Income", "Education", "Party",
                                                                                            "Generational Status", "Social Media Usage",
                                                                                            "Ancestry", "Age Group",
                                                                                            "Region", "Sex", "Religion"))) %>%
 unnest(px.2020, px.2022, 
        shift, turnout.2020, turnout.2022,
        vote1.2020, vote2.2020,
        vote1.2022, vote2.2022, .sep = '_') %>%
 transmute(variable, value, `Net D vote increase among Latinos` =
            shift_statistic,
           `Difference in subgroup D support` = 
            vote1.2022_statistic - vote1.2020_statistic,
           `Difference in subgroup turnout` = 
            turnout.2022_statistic - turnout.2020_statistic,
           `Difference in subgroup composition` = 
            px.2022_statistic - px.2020_statistic,
           `Difference in subgroup D-R margin` =
            (vote1.2022_statistic - vote2.2022_statistic) -
            (vote1.2020_statistic - vote2.2020_statistic),
           `Proportionate change in subgroup D support` = 
            (vote1.2022_statistic - vote1.2020_statistic)/vote1.2020_statistic,
           `Proportionate change in D-R margin` = 
            ((vote1.2022_statistic - vote2.2022_statistic) -
              (vote1.2020_statistic - vote2.2020_statistic))/(vote1.2020_statistic - vote2.2020_statistic),
           SE = shift_std.error) %>%
 gather(term, estimate, 
        `Net D vote increase among Latinos`:
         `Proportionate change in D-R margin`) -> t2_2022

kable(t2_2022, "html") %>%
  kable_styling("striped", full_width = F) %>%
  scroll_box(width = "750px", height = "500px")

p1 <- t2_2022 %>%
 mutate(estimate = -estimate) %>%
 filter(value != "") %>%
 filter(term == "Net D vote increase among Latinos") %>%
 mutate(variable = fct_reorder(variable, -abs(estimate))) %>%
 mutate(` ` = term) %>%
 ggplot(aes(x = value, y = estimate, group = ` `)) +
 geom_pointrange(aes(ymin = estimate - 1.96*SE,
                     ymax = estimate + 1.96*SE)) +
 geom_line() +
 facet_wrap(. ~ variable, scales = "free") +
 theme_minimal() +
 geom_hline(yintercept = 0, linetype = "dashed") +
 guides(shape=guide_legend(nrow=2,byrow=TRUE)) +
 scale_alpha(guide = 'none') +
 scale_shape_discrete(c(1,2,3,4)) +
 # Note: estimate above is reversed to display Net R vote increase
 labs(x = "", y = "Increase in Net Republican Votes") +
 theme(panel.grid.major = element_blank(),
       panel.grid.minor = element_blank(),
       axis.text.x = element_text(size = 8),
       legend.position = "bottom")

p1

```

## Figure A8: Decomposing Net Votes (2020-2022)

```{r figa9}
# constituent estimates of net vote increase (2020-2022)
p2 <- t2_2022 %>%
 filter(value != "") %>%
 mutate(t_stat = as.numeric(abs(estimate/SE) >= 1.645)) %>%
 filter(variable != "Unemployment Rate") %>%
 filter(term == "Net D vote increase among Latinos") %>%
 dplyr::group_by(variable) %>%
 dplyr::summarize(t_max = sum(t_stat)) %>%
 dplyr::filter(t_max > 0) %>%
 dplyr::select(variable) %>%
 left_join(t2_2022) %>%
 mutate(estimate = ifelse(term == "Difference in subgroup D support", -estimate,
                          estimate)) %>%
 mutate(term = plyr::mapvalues(term, "Difference in subgroup D support",
                               "Difference in subgroup R support")) %>%
 filter(term == "Difference in subgroup composition" |
         term == "Difference in subgroup R support" |
         term == "Difference in subgroup turnout") %>%
 mutate(estimate_r = round(estimate, 1)) %>%
 mutate(` ` = term) %>%
 ggplot(aes(x = value, y = estimate, group = ` `, color = ` `)) +
 geom_point(size = 3) +
 geom_line() +
 facet_wrap(. ~ variable, scales = "free") +
 theme_minimal() +
 geom_hline(yintercept = 0, linetype = "dashed") +
 guides(shape=guide_legend(nrow=2,byrow=TRUE)) +
 scale_alpha(guide = 'none') +
 scale_shape_discrete(c(1,2,3,4)) +
 scale_color_grey() +
 labs(x = "", y = "Percentage Point Change") +
 theme(panel.grid.major = element_blank(),
       panel.grid.minor = element_blank(),
       axis.text.x = element_text(size = 14),
       axis.text.y = element_text(size = 14),
       legend.text = element_text(size = 14),
       axis.title.y = element_text(size = 14),
       axis.title.x = element_text(size = 14),
       legend.position = "bottom")

p2

```

## Figure A9: Sensitivity analysis

```{r figa10}
net_votes_estimate <- function(vote1.2020, vote1.2022,
                               vote2.2020, vote2.2022,
                               turnout.2020, turnout.2022,
                               px.2020, px.2022) {
 
 return(((vote1.2022 - vote2.2022) * turnout.2022 * px.2022) - 
         ((vote1.2020 - vote2.2020) * turnout.2020 * px.2020))
 
}

t1_2022 %>%
 select(-std.error) %>%
 group_by(variable, value) %>%
 tidyr::pivot_wider(names_from = term, values_from = statistic) -> row_wise_df

find_min_turnout <- function(row) {
 
 turnout_range <- seq(0, 1, by = 0.01)
 
 for (turnout_2022 in turnout_range) {
  net_votes <- net_votes_estimate(row[["vote1.2020"]], row[["vote1.2022"]],
                                  row[["vote2.2020"]], row[["vote2.2022"]],
                                  row[["turnout.2020"]], turnout_2022,
                                  row[["px.2020"]], row[["px.2022"]])
  if (net_votes > 0) {
   return(turnout_2022)
  }
 }
 # Return NA if no positive net votes estimate was found
 return(NA)
}

row_wise_df %>%
 rowwise() %>%
 mutate(min_turnout_2022 = find_min_turnout(cur_data())) %>%
 ungroup() %>%
 mutate(variable = paste(variable), 
        value = paste(value)) -> row_wise_df2

kable(row_wise_df2, "html") %>%
  kable_styling("striped", full_width = F) %>%
  scroll_box(width = "750px", height = "500px")

row_wise_df2 %>%
 mutate(variable = plyr::mapvalues(variable, c("mexican", "cuban", "pr"),
                                   c("ancestry", "ancestry", "ancestry"))) %>%
 mutate(value = ifelse(variable == "educ", plyr::mapvalues(value, 
                                                           1:3,
                                                           c("  No college", 
                                                             " Some College", 
                                                             "College+")),
                       value)) %>%
 mutate(value = ifelse(variable == "age_group", plyr::mapvalues(value, 
                                                                1:3,
                                                                c("18-34", 
                                                                  "34-55", 
                                                                  "55+")),
                       value)) %>%
 mutate(value = ifelse(variable == "fam5", plyr::mapvalues(value, 1:5,
                                                           c("Quantile 1", 
                                                             "Quantile 2", 
                                                             "Quantile 3", 
                                                             "Quantile 4", 
                                                             "Quantile 5")),
                       value)) %>%
 mutate(value = ifelse(variable == "ideo5", plyr::mapvalues(value, 1:5,
                                                            c("Very Liberal", 
                                                              "Liberal", 
                                                              "Mod.", 
                                                              "Conservative", 
                                                              "Very Conservative")),
                       value)) %>%
 mutate(value = ifelse(variable == "imm.stat", plyr::mapvalues(value, 1:5,
                                                               c(" Immigrant\nCitizen", 
                                                                 "", 
                                                                 "1st Generation", 
                                                                 "2nd Generation", 
                                                                 "3rd Generation")),
                       value)) %>% 
 mutate(value = ifelse(variable == "pid3", plyr::mapvalues(value, 1:3,
                                                           c("Democrat",
                                                             "Republican",
                                                             "Independent")),
                       value)) %>%
 mutate(value = ifelse(variable == "police", plyr::mapvalues(value, 1:5,
                                                             c("Quantile 1", 
                                                               "Quantile 2", 
                                                               "Quantile 3", 
                                                               "Quantile 4", 
                                                               "Quantile 5")),
                       value)) %>%
 mutate(value = ifelse(variable == "region", plyr::mapvalues(value, 1:4,
                                                             c("Northeast", "South",
                                                               "North Central", "West")),
                       value)) %>%
 mutate(value = ifelse(variable == "imm", plyr::mapvalues(value, 1:5,
                                                          c("Quantile 1", 
                                                            "Quantile 2", 
                                                            "Quantile 3", 
                                                            "Quantile 4", 
                                                            "Quantile 5")),
                       value)) %>%
 mutate(value = ifelse(variable == "media", plyr::mapvalues(value, 1:5,
                                                            c("Quantile 1", 
                                                              "Quantile 2", 
                                                              "Quantile 3", 
                                                              "Quantile 4", 
                                                              "Quantile 5")),
                       value)) %>%
 mutate(value = ifelse(variable == "ancestry", plyr::mapvalues(value, 1:3,
                                                               c("Mexican", 
                                                                 "Cuban", 
                                                                 "Puerto Rican")),
                       value)) %>%
 mutate(value = ifelse(variable == "gender", plyr::mapvalues(value, 1:2,
                                                             c("M", 
                                                               "F")),
                       value)) %>%
 mutate(value = ifelse(variable == "religion", plyr::mapvalues(value, 1:4,
                                                               c("Protestant", 
                                                                 "Catholic",
                                                                 "Other",
                                                                 "Atheist/Agnostic")),
                       value)) %>%
 mutate(variable = plyr::mapvalues(variable, c("imm", "police",
                                               "ideo5", "fam5",
                                               "educ", "pid3", "imm.stat",
                                               "media", "ancestry", "age_group",
                                               "region", "gender",  "religion"), c("Immigration Restrictionism",
                                                                                            "Conservative Crime Policy Support", "Ideology", "Income", "Education", "Party", "Generational Status", "Social Media Usage", "Ancestry", "Age Group",
"Region", "Sex", "Religion"))) %>%
 na.omit() %>%
 mutate(variable = paste(variable, value, sep = ": "),
        gap = (min_turnout_2022 - turnout.2020)) %>% 
 mutate(variable = gsub("\\bGeneration\\b", "", variable)) %>%
 filter(min_turnout_2022 != 0) %>%
 ggplot(aes(y = forcats::fct_reorder(variable, gap))) +
 geom_segment(aes(x = turnout.2020, yend = variable, xend = min_turnout_2022, group = variable), size = 0.5) +
 geom_point(aes(x = turnout.2020, color = "2020 Turnout"), size = 5) +
 geom_point(aes(x = min_turnout_2022, color = "Required Turnout"), size = 5) +
 theme_minimal() +
 scale_color_manual(name = "", values = c("2020 Turnout" = "darkgrey", "Required Turnout" = "salmon")) +
 labs(x = "Turnout Rate", y = "Variable") +
 theme(panel.grid.major = element_blank(),
       panel.grid.minor = element_blank(),
       axis.title.x = element_text(size = 16),
       axis.text.x = element_text(size = 16),
       legend.position = "bottom")
```
